* Reset settings and initialize log file
launch, path("share/edemp")

*-------------------------------------------------------------------------------
* Price and Wasserman (2024), "The Summer Drop in Female Employment"
*
* Description: Show seasonality in education employment in the CPS vs. CES.
*-------------------------------------------------------------------------------


* Prepare data and run models
*-------------------------------------------------------------------------------

if "$estimate" != "0" {
	* Load CPS data on education-sector employment
	gzuse pid tm month wtfinl tmspline* weeks empstat emp school using "$basepath/data/derived/cps_bms_sample_allages.dta.gz", clear
	keep if school == 1
	gen byte atw = (empstat == 10)
	gcollapse (sum) cpsedemp = emp (sum) cpsedatw = atw (first) month tmspline* weeks [pw = wtfinl], by(tm)
	tempfile cpsdata
	save `cpsdata'

	* Load CES data on total nonfarm & private/local/state education employment
	freduse PAYNSA CEU6561000001 CEU9092161101 CEU9093161101, clear

	* Clarify series meanings
	rename PAYNSA cesemp
	rename CEU6561000001 cesedemp_private
	rename CEU9092161101 cesedemp_state
	rename CEU9093161101 cesedemp_local

	* Compute CES employment in (i) education and (ii) other sectors
	gen cesedemp = cesedemp_private + cesedemp_state + cesedemp_local
	gen cesnoned = cesemp - cesedemp

	* Extract year-month
	gen int tm = ym(real(substr(date, 1, 4)), real(substr(date, 6, 2)))
	keep if inrange(tm, tm(1990m1), tm(2019m12))
	format %tm tm
	keep tm cesedemp cesnoned
	order tm cesedemp cesnoned

	* Merge in CPS employment
	merge 1:1 tm using `cpsdata', assert(2 3) keep(3) nogenerate
	tsset tm

	* Express employment in millions
	replace cpsedemp = cpsedemp/1000000
	replace cpsedatw = cpsedatw/1000000
	replace cesedemp = cesedemp/1000
	replace cesnoned = cesnoned/1000

	* Compute log employment
	foreach v of varlist cpsedemp cpsedatw cesedemp cesnoned {
		rename `v' `v'_lvl
		gen `v'_log = ln(`v'_lvl)
	}

	* Run the specification
	foreach v of varlist *_lvl *_log {
		quietly ivreg2 `v' ib5.month tmspline* weeks, bw($bandwidth) robust small
		process_estimates, path("edemp") model("`v'")
	}
}


* Prepare estimates
*-------------------------------------------------------------------------------

* Load estimates into memory
load_estimates, path("edemp")

* Prepare coefficient labels for each month
make_coeflabels


* Plot seasonality in education-sector employment
*-------------------------------------------------------------------------------

foreach v in "log" "lvl" {
	* Prepare background shading
	clear
	set obs 14

	if "`v'" == "log" {
		local vtitle "Log employment"
		local vlbl "log points"
		local rescale rescale(100)

		local yrange "-70 10"
		local ylabel "-70(10)10"

		gen rlow = -70
		gen rupp = 10
		gen rval = _n - 0.5
	}
	else if "`v'" == "lvl" {
		local vtitle "Employment level"
		local vlbl "millions"
		local rescale

		local yrange "-6 2"
		local ylabel "-6(1)2"

		gen rlow = -6
		gen rupp = 2
		gen rval = _n - 0.5
	}

	* Plot estimates
	#delimit ;
	coefplot
		(cpsedemp_`v', offset(-0.075) recast(connected) label("Employment, education sector"))
		(cpsedatw_`v', offset(-0.025) recast(connected) label("Presence at work, education sector"))
		(cesedemp_`v', offset(+0.025) recast(connected) label("Employment, education sector"))
		(cesnoned_`v', offset(+0.075) recast(connected) label("Employment, other sectors")),
		coeflabels(`coeflabels')
		title("{bf:`vtitle'}")
		xtitle("")
		xlabel(, alternate)
		ytitle("Difference rel. to May (`vlbl')")
		yscale(range(`yrange'))
		ylabel(`ylabel', format(%5.0f))
		addplot(
			rarea rupp rlow rval if inrange(rval, 0.5, 5.5), color($ltgs) plotregion(margin(t=0 b=0)) below ||
			scatteri 0 0.5 0 13.5, recast(line) color(black) below)
		vertical
		legend(rows(2) order(- "CPS:" 4 6 - "CES:" 8 10) size(*0.8))
		`rescale';
	#delimit cr

	tempfile g`v'
	graph save `g`v''
}

grc1leg "`glog'" "`glvl'"
nicepdf "$basepath/output/edemp.pdf", panels(2) indirect replace

* Close the log file
unlaunch
